|
자바네트워크I/O |
[1] |
|
등록일:2008-03-11 19:48:54 (0%) 작성자: 제목:사이트에서 URL 뽑아내기 |
|
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import javax.swing.text.Document;
import javax.swing.text.EditorKit;
import javax.swing.text.ElementIterator;
import javax.swing.text.SimpleAttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLEditorKit;
public class EnumerateURLLink
{
public static void main(String[] args)
{
HttpURLConnection.setFollowRedirects(false);
EditorKit kit = new HTMLEditorKit();
Document doc = kit.createDefaultDocument();
// The Document class does not yet handle
// charset's properly.
doc.putProperty("IgnoreCharsetDirective", Boolean.TRUE);
try
{
// Create a reader on the HTML content.
URL url_ = new URL("http://www.naver.com");
URLConnection conn = url_.openConnection();
Reader rd = new InputStreamReader(conn.getInputStream());
// Parse the HTML.
kit.read(rd, doc, 0);
// Iterate through the elements of the HTML document.
ElementIterator it = new ElementIterator(doc);
javax.swing.text.Element elem;
while((elem = it.next()) != null)
{
SimpleAttributeSet s = (SimpleAttributeSet)
elem.getAttributes().getAttribute(HTML.Tag.A);
if (s != null)
{
String link = (String)s.getAttribute(HTML.Attribute.HREF);
if (link.indexOf("mailto:") != -1) {
continue;
}
// Skip JavaScript links.
if (link.toLowerCase().indexOf("javascript") != -1) {
continue;
}
if (link.indexOf("://") == -1) {
// Handle absolute URLs.
if (link.charAt(0) == '/') {
link = "http://" + url_.getHost() + link;
// Handle relative URLs.
} else {
String file = url_.getFile();
if (file.indexOf('/') == -1) {
link = "http://" + url_.getHost() + "/" + link;
} else {
String path = file.substring(0,
file.lastIndexOf('/') + 1);
link = "http://" + url_.getHost() + path + link;
}
}
}
System.out.println("URL Link = " + link);
}
}
}
catch (Exception e)
{
e.printStackTrace();
}
}
} |
[본문링크] 사이트에서 URL 뽑아내기
|
[1]
|
|
|
|
|
코멘트(이글의 트랙백 주소:/cafe/tb_receive.php?no=2507 |
|
|
|
|
|
|
|
|
|
Copyright byCopyright ⓒ2005, SSISO Community All Rights Reserved.
|
|
|